Imputation of Data GEMMA gwas analysis

#SBATCH --get-user-env
#SBATCH --clusters=biohpc_gen
#SBATCH --partition=biohpc_gen_normal
#SBATCH --cpus-per-task=4
#SBATCH --mem-per-cpu=4763mb
#SBATCH --time=24:00:00
#SBATCH -J imputation 
#SBATCH --error=%J.error
#SBATCH --array=1


source /dss/dsshome1/09/ra78pec/.bashrc

conda activate imputation 

java -Xmx12g -jar /dss/dsshome1/09/ra78pec/beagle.22Jul22.46e.jar  gt=/dss/dsshome1/09/ra78pec/data/HR.168.filtered.sort.vcf.gz out=/dss/dsshome1/09/ra78pec/data/imputation.vcf

GEMMA gwas analysis

#!/bin/bash
#SBATCH --get-user-env
#SBATCH --clusters=biohpc_gen
#SBATCH --partition=biohpc_gen_normal
#SBATCH --cpus-per-task=4
#SBATCH --mem-per-cpu=4763mb
#SBATCH --time=2:00:00
#SBATCH -J gwas
#SBATCH --error=%J.err

gemma -bfile /dss/dsshome1/09/ra78pec/GWAS/gemma_plink_subsetxs \
      -k /dss/dsshome1/09/ra78pec/output/relatedness.cXX.txt \
      -lmm 4 \ 
      -o gemma_output_categorical
      
# lmm option 4 do wald test , likelihood ration test , p-score test
# -k is the relatedness matrix here which is calculated by (gemma -bfile #bed_file -gk 1 -o relatedness )
 

Plotting p value with p > 0.005 (obtained from p_wald test)

library(tidyr)
library(dplyr)
library(ggplot2)
library(plotly)
library(manhattanly)

assoc_logistic <- read.table(file="/Users/vicegill/Documents/gemma_output_categorical.assoc.txt",header=TRUE)
assoc_logistic_filter <- assoc_logistic %>%
                      dplyr::filter(assoc_logistic$p_wald < 0.005)
assoc_logistic_filter$chr =as.numeric(assoc_logistic_filter$chr)
assoc_logistic_filter <- assoc_logistic_filter %>% filter(!is.na(chr))
manhattan_obj <- manhattanr(assoc_logistic_filter,chr="chr",bp="ps",p="p_wald")
manhattanly(manhattan_obj)